import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
add = "C:/Users/lenovo/Desktop/applestore(1).csv"
data = pd.read_csv(add)
print(data.head())
# 可视化付费应用的分布
#fact generator
print ('1. Free apps are ' + str(sum(data.price == 0)))
print ('2. Counting (outliers) super expensive apps ' + str(sum(data.price > 50)))
print (' - which is around ' + str(sum(data.price > 50)/len(data.price)*100) +
" % of the total Apps")
#处理掉异常值 价格大于50$的应用
outlier=data[data.price>50][['track_name','price','prime_genre','user_rating']]
freeapps = data[data.price==0]
outlier
# removing
paidapps =data[((data.price<50) & (data.price>0))]
print('Now the max price of any app in new data is : ' + str(max(paidapps.price)))
print('Now the min price of any app in new data is : ' + str(min(paidapps.price)))
#paidapps.prime_genre.value_counts()
plt.style.use('fivethirtyeight')